# File:	    11_descriptive.R
# Project:	Colonial legacy and foreign aid: Decomposing the colonial bias
# Authors:	Daina Chiba, Department of Government, University of Essex
#           Tobias Heinrich, Department of Political Science, University of South Carolina
# Contact:	dchiba@essex.ac.uk
# Date:     19 November, 2018

# Description: 
# This R script file replicates the descriptive statistics results reported in 
# Appendix C (Figures A.1 & A.2 and Table A.2).

rm(list=ls())
library(foreign)
library(ggplot2)
library(countrycode)
library(gmodels)

# Data manipulation -------------------------------------------------------

fa.data <- read.dta("colony-dummy.dta")

## log(0)
fa.data $ tobitDV <- fa.data $ lnGrossAid - log(.001)
fa.data $ tobitDV[fa.data $ AnyGrossAid == 0] <- 0

## Listwise delete missing values
fa.data.nna <- fa.data[, c("tobitDV", "RA", "RB", "WBdum2", "WBdum3", "WBdum4","WBdum5", "WB",
                           "colony_ever", "ccode", "rCCODE", "colony",
                           "lnDIS", "ColdWar", "lnPOPB", "dyad", "year", "rCNAME","dCNAME")]
# "Ltrade", "Ltau_glob", "Ltau2", "colony", "lnMultiAid", 

fa.data.nna <- na.omit(fa.data.nna)
fa.data.nna $ RB2 <- fa.data.nna $ RB^2

## time variables
fa.data.nna $ yearid <- fa.data.nna $ year - 1959
fa.data.nna $ yearid2 <- fa.data.nna $ yearid^2
fa.data.nna $ yearid3 <- fa.data.nna $ yearid^3
fa.data.nna $ time <- (fa.data.nna $ year - mean(fa.data.nna $ year))/sd(fa.data.nna $ year)
fa.data.nna $ time2 <- (fa.data.nna $ time)^2
fa.data.nna $ time3 <- (fa.data.nna $ time)^3

## donor
length(unique(fa.data.nna $ dCNAME))
fa.data.nna $ donor <- as.numeric(as.factor(fa.data.nna $ dCNAME))

## recipient
length(unique(fa.data.nna $ rCNAME))
fa.data.nna $ recip <- as.numeric(as.factor(fa.data.nna $ rCNAME))

## Censoring
fa.data.nna $ YLow <- ifelse(fa.data.nna $ tobitDV == 0, -Inf, fa.data.nna $ tobitDV)

# Data description --------------------------------------------------------
# (Shown in Appendix B)

## Donor-Recipients list
donor.col <- unique(fa.data.nna $ ccode[fa.data.nna $ colony == 1])
donor.nco <- unique(fa.data.nna $ ccode[fa.data.nna $ colony == 0])
recip.col <- unique(fa.data.nna $ rCCODE[fa.data.nna $ colony == 1])
recip.nco <- unique(fa.data.nna $ rCCODE[fa.data.nna $ colony == 0])

countrycode(donor.col, origin = "cown", destination = "country.name")
countrycode(donor.nco, origin = "cown", destination = "country.name")

## List of colonies
## 900 (Australia)
countrycode(
  unique(fa.data.nna $ rCCODE[fa.data.nna $ colony == 1 & fa.data.nna $ ccode == 900]),
  origin = "cown", destination = "country.name")

## 211 (Belgium)
countrycode(
  unique(fa.data.nna $ rCCODE[fa.data.nna $ colony == 1 & fa.data.nna $ ccode == 211]),
  origin = "cown", destination = "country.name")

## 220 (France)
sort(
  countrycode(
    unique(fa.data.nna $ rCCODE[fa.data.nna $ colony == 1 & fa.data.nna $ ccode == 220]),
    origin = "cown", destination = "country.name")
)

## 740 (Japan)
countrycode(
  unique(fa.data.nna $ rCCODE[fa.data.nna $ colony == 1 & fa.data.nna $ ccode == 740]),
  origin = "cown", destination = "country.name")

## 210 (Netherlands)
sort(
  countrycode(
    unique(fa.data.nna $ rCCODE[fa.data.nna $ colony == 1 & fa.data.nna $ ccode == 210]),
    origin = "cown", destination = "country.name")
)

## 2 (US)
sort(
  countrycode(
    unique(fa.data.nna $ rCCODE[fa.data.nna $ colony == 1 & fa.data.nna $ ccode == 2]),
    origin = "cown", destination = "country.name")
)

## 200 (UK)
sort(
  countrycode(
    unique(fa.data.nna $ rCCODE[fa.data.nna $ colony == 1 & fa.data.nna $ ccode == 200]),
    origin = "cown", destination = "country.name")
)

## 230 (Spain)
sort(
  countrycode(
    unique(fa.data.nna $ rCCODE[fa.data.nna $ colony == 1 & fa.data.nna $ ccode == 230]),
    origin = "cown", destination = "country.name")
)

## 235 (Portugal)
sort(
  countrycode(
    unique(fa.data.nna $ rCCODE[fa.data.nna $ colony == 1 & fa.data.nna $ ccode == 235]),
    origin = "cown", destination = "country.name")
)


col.li <- countrycode(recip.col, origin = "cown", destination = "country.name")
nco.li <- countrycode(recip.nco, origin = "cown", destination = "country.name")

nco.li[nco.li %in% col.li == FALSE]


## N
nrow(fa.data.nna)
table(fa.data.nna $ colony)

## Y
mean(fa.data.nna $ tobitDV)
mean(fa.data.nna $ tobitDV[fa.data.nna $ colony == 1])
mean(fa.data.nna $ tobitDV[fa.data.nna $ colony == 0])

## percentages
table(fa.data.nna $ colony)[2]/(table(fa.data.nna $ colony)[1] + table(fa.data.nna $ colony)[2])

sum(exp(fa.data.nna $ tobitDV[fa.data.nna $ colony == 1]))/sum(exp(fa.data.nna $ tobitDV))


# Observable diff for continuous variables --------------------------------

## Rearrange the data set
fa.data.nna$coldum <- "Non-colonial dyads"
fa.data.nna$coldum[fa.data.nna$colony==1] <- "Colonial dyads"
m.dv <- data.frame(x = fa.data.nna$tobitDV, coldum = fa.data.nna$coldum, vname = "Bilateral Aid")
m.RA <- data.frame(x = fa.data.nna$RA, coldum = fa.data.nna$coldum, vname = "Resource (Donor)")
m.RB <- data.frame(x = fa.data.nna$RB, coldum = fa.data.nna$coldum, vname = "Resource (Recipient)")
m.WB <- data.frame(x = fa.data.nna$WB, coldum = fa.data.nna$coldum, vname = "W (Recipient)")
m.DIS <- data.frame(x = fa.data.nna$lnDIS, coldum = fa.data.nna$coldum, vname = "Distance")
m.CW <- data.frame(x = fa.data.nna$ColdWar, coldum = fa.data.nna$coldum, vname = "Cold War")
m.POPB <- data.frame(x = fa.data.nna$lnPOPB, coldum = fa.data.nna$coldum, vname = "Population (Recipient)")

plot.data.1 <- rbind(m.dv, m.RA, m.RB)
plot.data.2 <- rbind(m.DIS, m.POPB)
cbgFillPalette <- scale_fill_manual(values=c("gray50","gray90"))

g <- ggplot(plot.data.1, aes(x)) + cbgFillPalette + theme_bw()
g <- g + geom_histogram() + xlab("") + ylab("") + aes(y = ..density..)
g <- g + facet_grid( coldum ~ vname, scales="free")
g <- g + theme(axis.title.x=element_blank())
print(g)
ggsave(file="output/figures/FigureA1_obsdiff1.pdf",width=10,height=5)

g <- ggplot(plot.data.2, aes(x)) + cbgFillPalette + theme_bw()
g <- g + geom_histogram() + xlab("") + ylab("") + aes(y = ..density..)
g <- g + facet_grid( coldum ~ vname, scales="free")
g <- g + theme(axis.title.x=element_blank())
print(g)
ggsave(file="output/figures/FigureA1_obsdiff2.pdf",width=10,height=5)


##	Observable diff for categorical variables
plot.data <- rbind(m.WB, m.CW)

g <- ggplot(plot.data, aes(x)) + cbgFillPalette + theme_bw()
g <- g + geom_histogram() + xlab("") + ylab("") + aes(y = ..density..)
g <- g + facet_grid( coldum ~ vname, scales="free")
g <- g + theme(axis.title.x=element_blank())
print(g)
ggsave(file="output/figures/FigureA2_obsdiff_cat.pdf",width=4,height=5)


# Difference of means test ------------------------------------------------
# (Table A.2)

t.test(fa.data.nna $ tobitDV ~ fa.data.nna $ colony)
# t = -57.1608, df = 2819.485, p-value < 2.2e-16
# 95 percent confidence interval:
#   -1.169817 -1.112107
# mean in group 0 mean in group 1 
# 3.508744        8.275837 

t.test(fa.data.nna $ RA ~ fa.data.nna $ colony)
# t = -77.5252, df = 3544.025, p-value < 2.2e-16
# 95 percent confidence interval:
#   -1.169817 -1.112107
# mean in group 0 mean in group 1 
# 9.945155       11.086117 

t.test(fa.data.nna $ RB ~ fa.data.nna $ colony)
# t = 9.7832, df = 2888.45, p-value < 2.2e-16
# 95 percent confidence interval:
#   0.2537849 0.3810134
# mean in group 0 mean in group 1 
# 7.926673        7.609274 

t.test(fa.data.nna $ lnDIS ~ fa.data.nna $ colony)
# t = 20.399, df = 2902.43, p-value < 2.2e-16
# 95 percent confidence interval:
#   0.1834985 0.2225262
# mean in group 0 mean in group 1 
# 8.366509        8.163496 

t.test(fa.data.nna $ lnPOPB ~ fa.data.nna $ colony)
# t = 6.8974, df = 2853.336, p-value = 6.498e-12
# 95 percent confidence interval:
#   0.1746914 0.3134656
# mean in group 0 mean in group 1 
# 1.724090        1.480012 


# KS tests

ks.test(x = fa.data.nna $ tobitDV[fa.data.nna $ colony==0], y = fa.data.nna $ tobitDV[fa.data.nna $ colony==1])
# D = 0.5314, p-value < 2.2e-16

ks.test(x = fa.data.nna $ RA[fa.data.nna $ colony==0], y = fa.data.nna $ RA[fa.data.nna $ colony==1])
# D = 0.4917, p-value < 2.2e-16

ks.test(x = fa.data.nna $ RB[fa.data.nna $ colony==0], y = fa.data.nna $ RB[fa.data.nna $ colony==1])
# D = 0.1074, p-value < 2.2e-16

ks.test(x = fa.data.nna $ lnDIS[fa.data.nna $ colony==0], y = fa.data.nna $ lnDIS[fa.data.nna $ colony==1])
# D = 0.269, p-value < 2.2e-16

ks.test(x = fa.data.nna $ lnPOPB[fa.data.nna $ colony==0], y = fa.data.nna $ lnPOPB[fa.data.nna $ colony==1])
# D = 0.0721, p-value = 4.782e-12



# Wilcox tests for categorical variables ----------------------------------

wilcox.test(fa.data.nna $ WB ~ fa.data.nna $ colony) 
# W = 108016082, p-value = 0.00128

CrossTable(fa.data.nna $ ColdWar, fa.data.nna $ colony, 
           chisq = T, prop.r = F, prop.c = F)
# Chi^2 =  0.3754766     d.f. =  1     p =  0.5400341 



# For control variables ---------------------------------------------------
## Listwise delete missing values
fa.data.nna2 <- fa.data[, c("tobitDV", "RA", "RB", "WBdum2", "WBdum3", "WBdum4","WBdum5", "WB",
                            "colony_ever", "ccode", "rCCODE", "colony", "Ltrade", "Ltau_glob", "Ltau2", "colony", "lnMultiAid", 
                            "lnDIS", "ColdWar", "lnPOPB", "dyad", "year", "rCNAME","dCNAME")]

fa.data.nna2 <- na.omit(fa.data.nna2)


# Produce Table A.2 -------------------------------------------------------

sink(file = "output/tables/tableA2.txt")
print("Table A.2: Observable differences")
print("KS test for Bilateral Aid")
t.test(fa.data.nna $ tobitDV ~ fa.data.nna $ colony)
ks.test(x = fa.data.nna $ tobitDV[fa.data.nna $ colony==0], y = fa.data.nna $ tobitDV[fa.data.nna $ colony==1])

print("KS test for Donor Resources")
t.test(fa.data.nna $ RA ~ fa.data.nna $ colony)
ks.test(x = fa.data.nna $ RA[fa.data.nna $ colony==0], y = fa.data.nna $ RA[fa.data.nna $ colony==1])

print("KS test for Recipient Resources")
t.test(fa.data.nna $ RB ~ fa.data.nna $ colony)
ks.test(x = fa.data.nna $ RB[fa.data.nna $ colony==0], y = fa.data.nna $ RB[fa.data.nna $ colony==1])

print("KS test for Recipient Population")
ks.test(x = fa.data.nna $ lnPOPB[fa.data.nna $ colony==0], y = fa.data.nna $ lnPOPB[fa.data.nna $ colony==1])
ks.test(x = fa.data.nna $ lnPOPB[fa.data.nna $ colony==0], y = fa.data.nna $ lnPOPB[fa.data.nna $ colony==1])

print("Wilcox test for Recipient W")
t.test(fa.data.nna $ WB ~ fa.data.nna $ colony) 
wilcox.test(fa.data.nna $ WB ~ fa.data.nna $ colony) 

print("Chi2 test for Cold War")
t.test(fa.data.nna $ ColdWar ~ fa.data.nna $ colony) 
CrossTable(fa.data.nna $ ColdWar, fa.data.nna $ colony, 
           chisq = T, prop.r = F, prop.c = F)

print("KS test for Distance")
t.test(fa.data.nna $ lnDIS ~ fa.data.nna $ colony)
ks.test(x = fa.data.nna $ lnDIS[fa.data.nna $ colony==0], y = fa.data.nna $ lnDIS[fa.data.nna $ colony==1])

print("KS test for Multilateral Aid")
t.test(fa.data.nna2 $ lnMultiAid ~ fa.data.nna2 $ colony)
ks.test(x = fa.data.nna2 $ lnMultiAid[fa.data.nna2 $ colony==0], y = fa.data.nna2 $ lnMultiAid[fa.data.nna2 $ colony==1])

print("KS test for Trade")
t.test(fa.data.nna2 $ Ltrade ~ fa.data.nna2 $ colony)
ks.test(x = fa.data.nna2 $ Ltrade[fa.data.nna2 $ colony==0], y = fa.data.nna2 $ Ltrade[fa.data.nna2 $ colony==1])

print("KS test for Alignment")
t.test(fa.data.nna2 $ Ltau_glob ~ fa.data.nna2 $ colony)
ks.test(x = fa.data.nna2 $ Ltau_glob[fa.data.nna2 $ colony==0], y = fa.data.nna2 $ Ltau_glob[fa.data.nna2 $ colony==1])
sink()

# End of file